{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [],
   "source": [
    "# 1.获取数据\n",
    "# 2.基本数据处理\n",
    "# 2.1 缺失值处理\n",
    "# 2.2 确定特征值,目标值\n",
    "# 2.3 分割数据\n",
    "# 3.特征工程(标准化)\n",
    "# 4.机器学习(逻辑回归)\n",
    "# 5.模型评估"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.preprocessing import StandardScaler\n",
    "from sklearn.linear_model import LogisticRegression\n",
    "from sklearn.metrics import classification_report, roc_auc_score"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [],
   "source": [
    "# 1.获取数据\n",
    "names = ['Sample code number', 'Clump Thickness', 'Uniformity of Cell Size', 'Uniformity of Cell Shape',\n",
    "                   'Marginal Adhesion', 'Single Epithelial Cell Size', 'Bare Nuclei', 'Bland Chromatin',\n",
    "                   'Normal Nucleoli', 'Mitoses', 'Class']\n",
    "\n",
    "data = pd.read_csv(\"https://archive.ics.uci.edu/ml/machine-learning-databases/breast-cancer-wisconsin/breast-cancer-wisconsin.data\",\n",
    "                  names=names)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": "     Sample code number  Clump Thickness  Uniformity of Cell Size  \\\n0               1000025                5                        1   \n1               1002945                5                        4   \n2               1015425                3                        1   \n3               1016277                6                        8   \n4               1017023                4                        1   \n..                  ...              ...                      ...   \n694              776715                3                        1   \n695              841769                2                        1   \n696              888820                5                       10   \n697              897471                4                        8   \n698              897471                4                        8   \n\n     Uniformity of Cell Shape  Marginal Adhesion  Single Epithelial Cell Size  \\\n0                           1                  1                            2   \n1                           4                  5                            7   \n2                           1                  1                            2   \n3                           8                  1                            3   \n4                           1                  3                            2   \n..                        ...                ...                          ...   \n694                         1                  1                            3   \n695                         1                  1                            2   \n696                        10                  3                            7   \n697                         6                  4                            3   \n698                         8                  5                            4   \n\n    Bare Nuclei  Bland Chromatin  Normal Nucleoli  Mitoses  Class  \n0             1                3                1        1      2  \n1            10                3                2        1      2  \n2             2                3                1        1      2  \n3             4                3                7        1      2  \n4             1                3                1        1      2  \n..          ...              ...              ...      ...    ...  \n694           2                1                1        1      2  \n695           1                1                1        1      2  \n696           3                8               10        2      4  \n697           4               10                6        1      4  \n698           5               10                4        1      4  \n\n[699 rows x 11 columns]",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>Sample code number</th>\n      <th>Clump Thickness</th>\n      <th>Uniformity of Cell Size</th>\n      <th>Uniformity of Cell Shape</th>\n      <th>Marginal Adhesion</th>\n      <th>Single Epithelial Cell Size</th>\n      <th>Bare Nuclei</th>\n      <th>Bland Chromatin</th>\n      <th>Normal Nucleoli</th>\n      <th>Mitoses</th>\n      <th>Class</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>1000025</td>\n      <td>5</td>\n      <td>1</td>\n      <td>1</td>\n      <td>1</td>\n      <td>2</td>\n      <td>1</td>\n      <td>3</td>\n      <td>1</td>\n      <td>1</td>\n      <td>2</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>1002945</td>\n      <td>5</td>\n      <td>4</td>\n      <td>4</td>\n      <td>5</td>\n      <td>7</td>\n      <td>10</td>\n      <td>3</td>\n      <td>2</td>\n      <td>1</td>\n      <td>2</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>1015425</td>\n      <td>3</td>\n      <td>1</td>\n      <td>1</td>\n      <td>1</td>\n      <td>2</td>\n      <td>2</td>\n      <td>3</td>\n      <td>1</td>\n      <td>1</td>\n      <td>2</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>1016277</td>\n      <td>6</td>\n      <td>8</td>\n      <td>8</td>\n      <td>1</td>\n      <td>3</td>\n      <td>4</td>\n      <td>3</td>\n      <td>7</td>\n      <td>1</td>\n      <td>2</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>1017023</td>\n      <td>4</td>\n      <td>1</td>\n      <td>1</td>\n      <td>3</td>\n      <td>2</td>\n      <td>1</td>\n      <td>3</td>\n      <td>1</td>\n      <td>1</td>\n      <td>2</td>\n    </tr>\n    <tr>\n      <th>...</th>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n    </tr>\n    <tr>\n      <th>694</th>\n      <td>776715</td>\n      <td>3</td>\n      <td>1</td>\n      <td>1</td>\n      <td>1</td>\n      <td>3</td>\n      <td>2</td>\n      <td>1</td>\n      <td>1</td>\n      <td>1</td>\n      <td>2</td>\n    </tr>\n    <tr>\n      <th>695</th>\n      <td>841769</td>\n      <td>2</td>\n      <td>1</td>\n      <td>1</td>\n      <td>1</td>\n      <td>2</td>\n      <td>1</td>\n      <td>1</td>\n      <td>1</td>\n      <td>1</td>\n      <td>2</td>\n    </tr>\n    <tr>\n      <th>696</th>\n      <td>888820</td>\n      <td>5</td>\n      <td>10</td>\n      <td>10</td>\n      <td>3</td>\n      <td>7</td>\n      <td>3</td>\n      <td>8</td>\n      <td>10</td>\n      <td>2</td>\n      <td>4</td>\n    </tr>\n    <tr>\n      <th>697</th>\n      <td>897471</td>\n      <td>4</td>\n      <td>8</td>\n      <td>6</td>\n      <td>4</td>\n      <td>3</td>\n      <td>4</td>\n      <td>10</td>\n      <td>6</td>\n      <td>1</td>\n      <td>4</td>\n    </tr>\n    <tr>\n      <th>698</th>\n      <td>897471</td>\n      <td>4</td>\n      <td>8</td>\n      <td>8</td>\n      <td>5</td>\n      <td>4</td>\n      <td>5</td>\n      <td>10</td>\n      <td>4</td>\n      <td>1</td>\n      <td>4</td>\n    </tr>\n  </tbody>\n</table>\n<p>699 rows × 11 columns</p>\n</div>"
     },
     "execution_count": 37,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [],
   "source": [
    "# 2.基本数据处理\n",
    "# 2.1 缺失值处理\n",
    "data = data.replace(to_replace=\"?\", value=np.nan)\n",
    "data = data.dropna()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": "     Sample code number  Clump Thickness  Uniformity of Cell Size  \\\n0               1000025                5                        1   \n1               1002945                5                        4   \n2               1015425                3                        1   \n3               1016277                6                        8   \n4               1017023                4                        1   \n..                  ...              ...                      ...   \n694              776715                3                        1   \n695              841769                2                        1   \n696              888820                5                       10   \n697              897471                4                        8   \n698              897471                4                        8   \n\n     Uniformity of Cell Shape  Marginal Adhesion  Single Epithelial Cell Size  \\\n0                           1                  1                            2   \n1                           4                  5                            7   \n2                           1                  1                            2   \n3                           8                  1                            3   \n4                           1                  3                            2   \n..                        ...                ...                          ...   \n694                         1                  1                            3   \n695                         1                  1                            2   \n696                        10                  3                            7   \n697                         6                  4                            3   \n698                         8                  5                            4   \n\n    Bare Nuclei  Bland Chromatin  Normal Nucleoli  Mitoses  Class  \n0             1                3                1        1      2  \n1            10                3                2        1      2  \n2             2                3                1        1      2  \n3             4                3                7        1      2  \n4             1                3                1        1      2  \n..          ...              ...              ...      ...    ...  \n694           2                1                1        1      2  \n695           1                1                1        1      2  \n696           3                8               10        2      4  \n697           4               10                6        1      4  \n698           5               10                4        1      4  \n\n[683 rows x 11 columns]",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>Sample code number</th>\n      <th>Clump Thickness</th>\n      <th>Uniformity of Cell Size</th>\n      <th>Uniformity of Cell Shape</th>\n      <th>Marginal Adhesion</th>\n      <th>Single Epithelial Cell Size</th>\n      <th>Bare Nuclei</th>\n      <th>Bland Chromatin</th>\n      <th>Normal Nucleoli</th>\n      <th>Mitoses</th>\n      <th>Class</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>1000025</td>\n      <td>5</td>\n      <td>1</td>\n      <td>1</td>\n      <td>1</td>\n      <td>2</td>\n      <td>1</td>\n      <td>3</td>\n      <td>1</td>\n      <td>1</td>\n      <td>2</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>1002945</td>\n      <td>5</td>\n      <td>4</td>\n      <td>4</td>\n      <td>5</td>\n      <td>7</td>\n      <td>10</td>\n      <td>3</td>\n      <td>2</td>\n      <td>1</td>\n      <td>2</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>1015425</td>\n      <td>3</td>\n      <td>1</td>\n      <td>1</td>\n      <td>1</td>\n      <td>2</td>\n      <td>2</td>\n      <td>3</td>\n      <td>1</td>\n      <td>1</td>\n      <td>2</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>1016277</td>\n      <td>6</td>\n      <td>8</td>\n      <td>8</td>\n      <td>1</td>\n      <td>3</td>\n      <td>4</td>\n      <td>3</td>\n      <td>7</td>\n      <td>1</td>\n      <td>2</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>1017023</td>\n      <td>4</td>\n      <td>1</td>\n      <td>1</td>\n      <td>3</td>\n      <td>2</td>\n      <td>1</td>\n      <td>3</td>\n      <td>1</td>\n      <td>1</td>\n      <td>2</td>\n    </tr>\n    <tr>\n      <th>...</th>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n    </tr>\n    <tr>\n      <th>694</th>\n      <td>776715</td>\n      <td>3</td>\n      <td>1</td>\n      <td>1</td>\n      <td>1</td>\n      <td>3</td>\n      <td>2</td>\n      <td>1</td>\n      <td>1</td>\n      <td>1</td>\n      <td>2</td>\n    </tr>\n    <tr>\n      <th>695</th>\n      <td>841769</td>\n      <td>2</td>\n      <td>1</td>\n      <td>1</td>\n      <td>1</td>\n      <td>2</td>\n      <td>1</td>\n      <td>1</td>\n      <td>1</td>\n      <td>1</td>\n      <td>2</td>\n    </tr>\n    <tr>\n      <th>696</th>\n      <td>888820</td>\n      <td>5</td>\n      <td>10</td>\n      <td>10</td>\n      <td>3</td>\n      <td>7</td>\n      <td>3</td>\n      <td>8</td>\n      <td>10</td>\n      <td>2</td>\n      <td>4</td>\n    </tr>\n    <tr>\n      <th>697</th>\n      <td>897471</td>\n      <td>4</td>\n      <td>8</td>\n      <td>6</td>\n      <td>4</td>\n      <td>3</td>\n      <td>4</td>\n      <td>10</td>\n      <td>6</td>\n      <td>1</td>\n      <td>4</td>\n    </tr>\n    <tr>\n      <th>698</th>\n      <td>897471</td>\n      <td>4</td>\n      <td>8</td>\n      <td>8</td>\n      <td>5</td>\n      <td>4</td>\n      <td>5</td>\n      <td>10</td>\n      <td>4</td>\n      <td>1</td>\n      <td>4</td>\n    </tr>\n  </tbody>\n</table>\n<p>683 rows × 11 columns</p>\n</div>"
     },
     "execution_count": 39,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 2.2 确定特征值,目标值\n",
    "data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": "   Clump Thickness  Uniformity of Cell Size  Uniformity of Cell Shape  \\\n0                5                        1                         1   \n1                5                        4                         4   \n2                3                        1                         1   \n3                6                        8                         8   \n4                4                        1                         1   \n\n   Marginal Adhesion  Single Epithelial Cell Size Bare Nuclei  \\\n0                  1                            2           1   \n1                  5                            7          10   \n2                  1                            2           2   \n3                  1                            3           4   \n4                  3                            2           1   \n\n   Bland Chromatin  Normal Nucleoli  Mitoses  \n0                3                1        1  \n1                3                2        1  \n2                3                1        1  \n3                3                7        1  \n4                3                1        1  ",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>Clump Thickness</th>\n      <th>Uniformity of Cell Size</th>\n      <th>Uniformity of Cell Shape</th>\n      <th>Marginal Adhesion</th>\n      <th>Single Epithelial Cell Size</th>\n      <th>Bare Nuclei</th>\n      <th>Bland Chromatin</th>\n      <th>Normal Nucleoli</th>\n      <th>Mitoses</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>5</td>\n      <td>1</td>\n      <td>1</td>\n      <td>1</td>\n      <td>2</td>\n      <td>1</td>\n      <td>3</td>\n      <td>1</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>5</td>\n      <td>4</td>\n      <td>4</td>\n      <td>5</td>\n      <td>7</td>\n      <td>10</td>\n      <td>3</td>\n      <td>2</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>3</td>\n      <td>1</td>\n      <td>1</td>\n      <td>1</td>\n      <td>2</td>\n      <td>2</td>\n      <td>3</td>\n      <td>1</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>6</td>\n      <td>8</td>\n      <td>8</td>\n      <td>1</td>\n      <td>3</td>\n      <td>4</td>\n      <td>3</td>\n      <td>7</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>4</td>\n      <td>1</td>\n      <td>1</td>\n      <td>3</td>\n      <td>2</td>\n      <td>1</td>\n      <td>3</td>\n      <td>1</td>\n      <td>1</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
     },
     "execution_count": 40,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "x = data.iloc[:, 1:-1]\n",
    "x.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": "0    2\n1    2\n2    2\n3    2\n4    2\nName: Class, dtype: int64"
     },
     "execution_count": 41,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "y = data[\"Class\"]\n",
    "y.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [],
   "source": [
    "# 2.3 分割数据\n",
    "x_train, x_test, y_train, y_test = train_test_split(x, y, random_state=22, test_size=0.2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [],
   "source": [
    "# 3.特征工程(标准化)\n",
    "transfer = StandardScaler()\n",
    "x_train = transfer.fit_transform(x_train)\n",
    "x_test = transfer.fit_transform(x_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": "LogisticRegression()",
      "text/html": "<style>#sk-container-id-3 {color: black;}#sk-container-id-3 pre{padding: 0;}#sk-container-id-3 div.sk-toggleable {background-color: white;}#sk-container-id-3 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-3 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-3 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-3 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-3 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-3 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-3 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-3 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-3 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-3 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-3 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-3 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-3 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-3 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-3 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-3 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-3 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-3 div.sk-item {position: relative;z-index: 1;}#sk-container-id-3 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-3 div.sk-item::before, #sk-container-id-3 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-3 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-3 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-3 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-3 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-3 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-3 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-3 div.sk-label-container {text-align: center;}#sk-container-id-3 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-3 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-3\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>LogisticRegression()</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-3\" type=\"checkbox\" checked><label for=\"sk-estimator-id-3\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">LogisticRegression</label><div class=\"sk-toggleable__content\"><pre>LogisticRegression()</pre></div></div></div></div></div>"
     },
     "execution_count": 44,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 4.机器学习(逻辑回归)\n",
    "estimator = LogisticRegression()\n",
    "estimator.fit(x_train, y_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "准确率为:\n",
      " 0.9854014598540146\n",
      "预测值为:\n",
      " [2 4 4 2 2 2 2 2 2 2 2 2 2 4 2 2 4 4 4 2 4 2 4 4 4 2 4 2 2 2 2 2 4 2 2 2 4\n",
      " 2 2 2 2 4 2 4 4 4 4 2 4 4 2 2 2 2 2 4 2 2 2 2 4 4 4 4 2 4 2 2 4 2 2 2 2 4\n",
      " 2 2 2 2 2 2 4 4 4 2 4 4 4 4 2 2 2 4 2 4 2 2 2 2 2 2 4 2 2 4 2 2 4 2 4 4 2\n",
      " 2 2 2 4 2 2 2 2 2 2 4 2 4 2 2 2 4 2 4 2 2 2 4 2 2 2]\n"
     ]
    }
   ],
   "source": [
    "# 5.模型评估\n",
    "# 5.1 准确率\n",
    "ret = estimator.score(x_test, y_test)\n",
    "print(\"准确率为:\\n\", ret)\n",
    "\n",
    "# 5.2 预测值\n",
    "y_pre = estimator.predict(x_test)\n",
    "print(\"预测值为:\\n\", y_pre)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "              precision    recall  f1-score   support\n",
      "\n",
      "          良性       0.99      0.99      0.99        89\n",
      "          恶性       0.98      0.98      0.98        48\n",
      "\n",
      "    accuracy                           0.99       137\n",
      "   macro avg       0.98      0.98      0.98       137\n",
      "weighted avg       0.99      0.99      0.99       137\n",
      "\n"
     ]
    }
   ],
   "source": [
    "# 5.3 精确率\\召回率指标评价\n",
    "ret = classification_report(y_test, y_pre, labels=(2, 4), target_names=(\"良性\", \"恶性\"))\n",
    "print(ret)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "metadata": {
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": "0.9839653558052434"
     },
     "execution_count": 47,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 5.4 auc指标计算\n",
    "y_test = np.where(y_test>3, 1, 0)\n",
    "\n",
    "\n",
    "\n",
    "roc_auc_score(y_test, y_pre)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "outputs": [],
   "source": [
    "\n",
    "\n",
    "\n",
    "\n",
    "\n"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "outputs": [],
   "source": [],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.5"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {},
   "toc_section_display": true,
   "toc_window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}